In [1]:
from numpy.linalg import inv
import numpy as np
from math import pi, sqrt, gamma
from scipy.stats import t
import matplotlib.pyplot as plt
In [2]:
%matplotlib inline
In [3]:
def my_t(x, df):
_ = (df + 1.)/2.
return gamma(_) / (sqrt(pi* df) * gamma(df/2.) * (1. + x**2/df) ** (_))
In [4]:
def my_t(x, df):
_ = lambda x : (df + x)/2.
return gamma(_(1)) / (sqrt(pi* df) * gamma(_(0)) * (1. + x**2/df) ** (_(1)))
In [5]:
my_t(0, 2.74)
Out[5]:
In [6]:
rv = t(2.74)
rv.pdf(0)
Out[6]:
where $$ \delta(y, \mu; \Sigma) = (y-\mu)^T \Sigma^{-1} (y-\mu) $$
In [7]:
def squared_distance(x, mu, sigma):
diff = (x - mu)
return diff.dot(inv(sigma)).dot(diff.T)
In [8]:
def multivariate(x, mu, sigma, df):
p = x.shape[1]
f = lambda _ : (df+_)/2.
det = np.linalg.det(sigma) ** (-1./2.)
param0 = gamma(f(p))
param1 = (np.pi * df) ** (-p/2.)
param2 = gamma(f(0)) ** -1.
delta = x - mu
param3 = 1. + (delta.dot(inv(sigma)).dot(delta.T))/df
param3 = param3 ** (-f(p))
#return param3
return param0 * det * param1 * param2 * param3
In [12]:
np.linalg.det([[1,0],[0,1]]) ** (-1./2.)
Out[12]:
In [18]:
x = np.array([1,1])
mu = np.array([3,3])
In [19]:
dec = np.linalg.cholesky([[1,0],[0,1]])
In [22]:
(np.linalg.solve(dec, x - mu) ** 2).sum(axis=0)
Out[22]:
In [11]:
multivariate(np.array([[1,1]]), [3,3], [[1,0],[0,1]], 1)
Out[11]:
In [143]:
x1, y1 = np.mgrid[-2.5:2.5:.01, -2.5:2.5:.01]
In [176]:
XY = []
for xy in zip(x1, y1):
sample = np.array(xy).T
xy_ = []
for _ in sample:
l = multivariate(_.reshape(1,-1), [.0,.0],[[1.,0.],[0,1.]],100)
xy_.extend(l[0])
XY.append(xy_)
XY = np.array(XY)
print XY.shape
In [177]:
plt.contour(x1, y1, XY)
plt.hlines(1, -2.5, 2.5)
plt.vlines(1, -2.5, 2.5)
plt.show()
In [162]:
x1, y1 = np.mgrid[-2.5:2.5:.01, -2.5:2.5:.01]
In [180]:
XY = []
for xy in zip(x1, y1):
sample = np.array(xy).T
xy_ = []
for _ in sample:
l = multivariate(_.reshape(1,-1), [.0,.0],[[.1,.0],[.0,.2]],100)
xy_.extend(l[0])
XY.append(xy_)
XY = np.array(XY)
print XY.shape
In [181]:
plt.contour(x1, y1, XY)
plt.show()
In [151]:
#written by Enzo Michelangeli, style changes by josef-pktd
# Student's T random variable
def multivariate_t_rvs(m, S, df=np.inf, n=1):
'''generate random variables of multivariate t distribution
Parameters
----------
m : array_like
mean of random variable, length determines dimension of random variable
S : array_like
square array of covariance matrix
df : int or float
degrees of freedom
n : int
number of observations, return random array will be (n, len(m))
Returns
-------
rvs : ndarray, (n, len(m))
each row is an independent draw of a multivariate t distributed
random variable
'''
m = np.asarray(m)
d = len(m)
if df == np.inf:
x = 1.
else:
x = np.random.chisquare(df, n)/df
z = np.random.multivariate_normal(np.zeros(d),S,(n,))
return m + z/np.sqrt(x)[:,None] # same output format as random.multivariate_normal
In [156]:
x1 = multivariate_t_rvs([0,0], [[1,0],[0,1]],9, 300)
x2 = multivariate_t_rvs([1.5,1.5], [[.5,1.],[.1,.7]],9, 300)
In [161]:
plt.scatter(x1[:,0], x1[:,1], alpha=.5)
plt.scatter(x2[:,0], x2[:,1], alpha=.5)
Out[161]:
EM Algorithm for ML estimation of the parameters $\mu$ and $\Sigma$ with known degree of freedom.